Case study in solving problems with Polars and Altair
Duke University Libraries
Center for Data and Visualization Sciences
Faculty need a quick way of summarizing busy and free times for the group of students they advise
Students’ schedules
Students’ advisors
All schedules/advisor
| StudentID | StudentName | Descr | Subject | Pat | Mtg Start | Mtg End |
|---|---|---|---|---|---|---|
| str | str | str | str | str | str | str |
| "062264a" | "Joseph Garner" | "INTRODUCTORY MECHANICS" | "PHYSICS" | "WF" | "1:25:00 PM" | "2:40:00 PM" |
| "062264a" | "Joseph Garner" | "ACADEMIC WRITING" | "WRITING" | "WF" | "3:05:00 PM" | "4:20:00 PM" |
| "062264a" | "Joseph Garner" | "MATRICES AND VECTORS" | "MATH" | "TTH" | "10:05:00 AM" | "11:20:00 AM" |
| "062264a" | "Joseph Garner" | "INTRO TO SIGNALS AND SYSTEMS" | "ECE" | "F" | "10:05:00 AM" | "12:55:00 PM" |
| "062264a" | "Joseph Garner" | "ADV TOPICS IN DEEP LEARNING" | "ECE" | "MW" | "11:45:00 AM" | "1:00:00 PM" |
| StudentID | StudentName | Descr | Subject | Pat | Mtg Start | Mtg End |
|---|---|---|---|---|---|---|
| str | str | str | str | str | str | str |
| "062264a" | "Joseph Garner" | "INTRODUCTORY MECHANICS" | "PHYSICS" | "WF" | "1:25:00 PM" | "2:40:00 PM" |
| "062264a" | "Joseph Garner" | "ACADEMIC WRITING" | "WRITING" | "WF" | "3:05:00 PM" | "4:20:00 PM" |
| "062264a" | "Joseph Garner" | "MATRICES AND VECTORS" | "MATH" | "TTH" | "10:05:00 AM" | "11:20:00 AM" |
| "062264a" | "Joseph Garner" | "INTRO TO SIGNALS AND SYSTEMS" | "ECE" | "F" | "10:05:00 AM" | "12:55:00 PM" |
| "062264a" | "Joseph Garner" | "ADV TOPICS IN DEEP LEARNING" | "ECE" | "MW" | "11:45:00 AM" | "1:00:00 PM" |
students_df = (pl.read_csv('./data/students_deidentified.csv')
.with_columns(start=pl.col('Mtg Start').str.to_time('%r'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
)
print('Full DataFrame shape:', students_df.shape)
students_df.head(10)Full DataFrame shape: (2976, 7)
| StudentID | StudentName | Descr | Subject | Pat | start | end |
|---|---|---|---|---|---|---|
| str | str | str | str | str | time | time |
| "062264a" | "Joseph Garner" | "INTRODUCTORY MECHANICS" | "PHYSICS" | "WF" | 13:25:00 | 14:40:00 |
| "062264a" | "Joseph Garner" | "ACADEMIC WRITING" | "WRITING" | "WF" | 15:05:00 | 16:20:00 |
| "062264a" | "Joseph Garner" | "MATRICES AND VECTORS" | "MATH" | "TTH" | 10:05:00 | 11:20:00 |
| "062264a" | "Joseph Garner" | "INTRO TO SIGNALS AND SYSTEMS" | "ECE" | "F" | 10:05:00 | 12:55:00 |
| "062264a" | "Joseph Garner" | "ADV TOPICS IN DEEP LEARNING" | "ECE" | "MW" | 11:45:00 | 13:00:00 |
| "062264a" | "Joseph Garner" | "INTRODUCTORY MECHANICS" | "PHYSICS" | "M" | 13:30:00 | 15:30:00 |
| "062264a" | "Joseph Garner" | "INTRODUCTORY MECHANICS" | "PHYSICS" | "T" | 13:30:00 | 15:30:00 |
| "062264a" | "Joseph Garner" | "HOUSE COURSE (SP TOP)" | "HOUSECS" | "M" | 17:15:00 | 18:45:00 |
| "74b05b7" | "Vanessa Thompson" | "INTERMEDIATE MECHANICS" | "PHYSICS" | "WF" | 10:05:00 | 11:20:00 |
| "74b05b7" | "Vanessa Thompson" | "INTRO EXPERIMENTAL PHYSICS I" | "PHYSICS" | "T" | 15:45:00 | 17:45:00 |
(pl.read_csv('./data/students_deidentified.csv')
.with_columns(start=pl.col('Mtg Start').str.to_time('%r'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
)read_csv()(pl.read_csv('./data/students_deidentified.csv')
.with_columns(start=pl.col('Mtg Start').str.to_time('%r'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
).with_columns() – adds new or modifies existing columns(pl.read_csv('./data/students_deidentified.csv')
.with_columns(pl.col('Mtg Start').str.to_time('%r').alias('start'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
)Two format options:
expression.alias('name')
or
name=expression
.alias() after name=(pl.read_csv('./data/students_deidentified.csv')
.with_columns(start=pl.col('Mtg Start').str.to_time('%r'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
)time data type for later manipulation.str.to_time() [format options documentation]%r is 12-hr time with AM/PM(pl.read_csv('./data/students_deidentified.csv')
.with_columns(start=pl.col('Mtg Start').str.to_time('%r'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
).drop() – only drops columns, unlike Pandas(pl.read_csv('./data/students_deidentified.csv')
.with_columns(start=pl.col('Mtg Start').str.to_time('%r'),
end=pl.col('Mtg End').str.to_time('%r'))
.drop(pl.col('Mtg Start','Mtg End'))
)Two format options:
pl.col('name')← Polars expression
or
'name'← plain string shortcut
| StudentID | StudentName | AdvisorName | AdvisorID |
|---|---|---|---|
| str | str | str | str |
| "f1010e9" | "Misty Lee" | "Sheri Mosley" | "28db778" |
| "7d8a6c1" | "Rebecca Thomas" | "Sheri Mosley" | "28db778" |
| "3d66893" | "Karen Clark" | "Sheri Mosley" | "28db778" |
| "f151ba8" | "Maxwell Kirby" | "Sheri Mosley" | "28db778" |
| "d48c1d6" | "Maria Robertson" | "Sheri Mosley" | "28db778" |
| … | … | … | … |
| "fde1944" | "Gina Wolfe" | "Philip Gallagher" | "5cb893c" |
| "1315a19" | "Dalton Wu" | "Philip Gallagher" | "5cb893c" |
| "52fbc5f" | "Heidi Stafford" | "Philip Gallagher" | "5cb893c" |
| "13704ca" | "Neil Caldwell" | "Philip Gallagher" | "5cb893c" |
| "dc434aa" | "Brooke Whitney" | "Philip Gallagher" | "5cb893c" |
| StudentID | StudentName | Descr | Subject | Pat | start | end | AdvisorName | AdvisorID |
|---|---|---|---|---|---|---|---|---|
| str | str | str | str | str | time | time | str | str |
| "36d204f" | "Jermaine Carpenter" | "INTERMEDIATE MECHANICS" | "PHYSICS" | "WF" | 11:45:00 | 13:00:00 | "Philip Gallagher" | "5cb893c" |
| "f73ec3a" | "Tonya Boone" | "ACADEMIC WRITING" | "WRITING" | "WF" | 11:45:00 | 13:00:00 | "Maurice Maldonado" | "5af7c52" |
| "f86df8f" | "Jerry Baker" | "DISCRETE MATH FOR COMPSCI" | "COMPSCI" | "F" | 15:05:00 | 16:20:00 | "Angie Moon" | "e7e131b" |
| "ae7e7ed" | "Lydia Alexander" | "INTRO EXPERIMENTAL PHYSICS I" | "PHYSICS" | "T" | 13:30:00 | 15:30:00 | "Lindsey Burnett" | "c0d3c80" |
| "04eca94" | "Paula Guerrero" | "INTERMEDIATE MECHANICS" | "PHYSICS" | "T" | 18:00:00 | 20:00:00 | "Lindsey Burnett" | "c0d3c80" |
| "acf7fb1" | "Suzanne Zimmerman" | "REAL ANALYSIS I" | "MATH" | "TH" | 10:05:00 | 11:20:00 | "Luke Pace" | "4f3f9d1" |
| "a763934" | "Jim Fisher" | "ENGR DESIGN & COMMUNICATION" | "EGR" | "TH" | 10:05:00 | 12:45:00 | "No Advisor" | null |
| "b024071" | "Shawna Gregory" | "INTERMEDIATE MECHANICS" | "PHYSICS" | "WF" | 11:45:00 | 13:00:00 | "Brett Mejia" | "f13c9eb" |
| "c1d9a1e" | "Nathaniel Werner" | "INTRO EXPERIMENTAL PHYSICS I" | "PHYSICS" | "M" | 15:45:00 | 17:45:00 | "Yolanda Meyers" | "08a516d" |
| "e4a781d" | "Devin Shaffer" | "MECHANICS OF SOLIDS" | "EGR" | "T" | 13:25:00 | 14:40:00 | "Earl Wiley" | "c16ca42" |
(students_advisors_df
.filter(pl.col('AdvisorName') == "No Advisor")
.select(pl.col('StudentID','StudentName'))
.unique()
)| StudentID | StudentName |
|---|---|
| str | str |
| "99c59cc" | "Bruce Pratt" |
| "a1964aa" | "Christian Stewart" |
| "6a2617f" | "Leslie Willis" |
| "630cfdd" | "Beverly Wright" |
| "368d26d" | "Brent Lopez" |
| … | … |
| "a763934" | "Jim Fisher" |
| "c299e53" | "Crystal Becker" |
| "14aafd7" | "David Lewis" |
| "42366d3" | "Kenneth Chandler" |
| "55bd99b" | "Elizabeth Johnson" |
(students_advisors_df
.filter(pl.col('AdvisorName') == "No Advisor")
.select(pl.col('StudentID','StudentName'))
.unique()
).filter()
Images: Janssens, J., & Nieuwdorp, T. (2025). Python Polars: The Definitive Guide. O’Reilly Media.
(students_advisors_df
.filter(pl.col('AdvisorName') == "No Advisor")
.select(pl.col('StudentID','StudentName'))
.unique()
).filter()
.select()
Images: Janssens, J., & Nieuwdorp, T. (2025). Python Polars: The Definitive Guide. O’Reilly Media.
(students_advisors_df
.filter(pl.col('AdvisorName') == "No Advisor")
.select(pl.col('StudentID','StudentName'))
.unique()
)| StudentID | StudentName |
|---|---|
| str | str |
| "9ff23f4" | "Randy Williams" |
| "dce891d" | "Leonard Armstrong" |
| "6a2617f" | "Leslie Willis" |
| "1ef6826" | "Brendan Dodson" |
| "9d04ed2" | "Caitlin Adams" |
| … | … |
| "a763934" | "Jim Fisher" |
| "5fe5193" | "Daniel Miller" |
| "368d26d" | "Brent Lopez" |
| "de5e9c9" | "Jenny Reyes" |
| "55bd99b" | "Elizabeth Johnson" |
(students_advisors_df
.filter(pl.col('start').is_null())
.group_by(pl.col('Subject','Descr'))
.agg(count = pl.col('StudentID').count())
.sort('count', descending=True)
)| Subject | Descr | count |
|---|---|---|
| str | str | u32 |
| "EGR" | "RESEARCH PROJECTS IN EGR" | 52 |
| "CEE" | "ENGINEERING THE PLANET" | 10 |
| "MUSIC" | "SYMPHONY ORCHESTRA" | 2 |
| "MUSIC" | "INTRO GUITAR CLASS" | 2 |
| "EGR" | "DESIGN TO DELIVER" | 1 |
| … | … | … |
| "MUSIC" | "FLUTE" | 1 |
| "ISS" | "INFORMATION, SOCIETY & CULTURE" | 1 |
| "MUSIC" | "CLARINET" | 1 |
| "MUSIC" | "MEET THE BEATLES AND THE 1960S" | 1 |
| "MUSIC" | "THEOR/PRAC TONAL MUS I" | 1 |
(students_advisors_df
.filter(pl.col('start').is_null())
.group_by(pl.col('Subject','Descr'))
.agg(count = pl.col('StudentID').count())
.sort('count', descending=True)
).filter() lets through what is True(students_advisors_df
.filter(pl.col('start').is_null())
.group_by(pl.col('Subject','Descr'))
.agg(count = pl.col('StudentID').count())
.sort('count', descending=True)
).group_by() returns grouping variables as normal columnsIndex in Polars DataFrames(students_advisors_df
.filter(pl.col('start').is_null())
.group_by(pl.col('Subject','Descr'))
.agg(count = pl.col('StudentID').count())
.sort('count', descending=True)
).agg() separated by commas(students_advisors_df
.filter(pl.col('start').is_null())
.group_by(pl.col('Subject','Descr'))
.agg(count = pl.col('StudentID').count())
.sort('count', descending=True)
)ascending=False(students_advisors_df
.filter(pl.col('start').is_null())
.group_by(pl.col('Subject','Descr'))
.agg(count = pl.col('StudentID').count())
.sort('count', descending=True)
)| Subject | Descr | count |
|---|---|---|
| str | str | u32 |
| "EGR" | "RESEARCH PROJECTS IN EGR" | 52 |
| "CEE" | "ENGINEERING THE PLANET" | 10 |
| "MUSIC" | "INTRO GUITAR CLASS" | 2 |
| "MUSIC" | "SYMPHONY ORCHESTRA" | 2 |
| "MUSIC" | "WIND SYMPHONY" | 1 |
| … | … | … |
| "HLTHPOL" | "BASS CONNECTION HEALTH POLICY" | 1 |
| "EGR" | "DESIGN TO DELIVER" | 1 |
| "MUSIC" | "CLARINET" | 1 |
| "MUSIC" | "THEOR/PRAC TONAL MUS I" | 1 |
| "MUSIC" | "SAXOPHONE" | 1 |
| StudentID | StudentName | Descr | Subject | Pat | start | end | AdvisorName | AdvisorID |
|---|---|---|---|---|---|---|---|---|
| 062264a | Joseph Garner | INTRODUCTORY MECHANICS | PHYSICS | WF | 13:25:00 | 14:40:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | ACADEMIC WRITING | WRITING | WF | 15:05:00 | 16:20:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | MATRICES AND VECTORS | MATH | TTH | 10:05:00 | 11:20:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | INTRO TO SIGNALS AND SYSTEMS | ECE | F | 10:05:00 | 12:55:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | ADV TOPICS IN DEEP LEARNING | ECE | MW | 11:45:00 | 13:00:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | INTRODUCTORY MECHANICS | PHYSICS | M | 13:30:00 | 15:30:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | INTRODUCTORY MECHANICS | PHYSICS | T | 13:30:00 | 15:30:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | HOUSE COURSE (SP TOP) | HOUSECS | M | 17:15:00 | 18:45:00 | Henry Schroeder | e6d6592 |
| 74b05b7 | Vanessa Thompson | INTERMEDIATE MECHANICS | PHYSICS | WF | 10:05:00 | 11:20:00 | Dan Sims | eced0a0 |
| 74b05b7 | Vanessa Thompson | INTRO EXPERIMENTAL PHYSICS I | PHYSICS | T | 15:45:00 | 17:45:00 | Dan Sims | eced0a0 |
great_tablesfrom great_tables import loc, style
(students_advisors_df.head(6).style
.tab_style(
style=[style.text(weight='bold'), style.fill("#FCF7E5")],
locations=loc.body(columns="Pat")
)
)| StudentID | StudentName | Descr | Subject | Pat | start | end | AdvisorName | AdvisorID |
|---|---|---|---|---|---|---|---|---|
| 062264a | Joseph Garner | INTRODUCTORY MECHANICS | PHYSICS | WF | 13:25:00 | 14:40:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | ACADEMIC WRITING | WRITING | WF | 15:05:00 | 16:20:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | MATRICES AND VECTORS | MATH | TTH | 10:05:00 | 11:20:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | INTRO TO SIGNALS AND SYSTEMS | ECE | F | 10:05:00 | 12:55:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | ADV TOPICS IN DEEP LEARNING | ECE | MW | 11:45:00 | 13:00:00 | Henry Schroeder | e6d6592 |
| 062264a | Joseph Garner | INTRODUCTORY MECHANICS | PHYSICS | M | 13:30:00 | 15:30:00 | Henry Schroeder | e6d6592 |
Styling docs: https://docs.pola.rs/user-guide/misc/styling/
days_version = "Abbreviation"
# days_version = "Full"
classdays_abbrev_dict = {'TTH':['Tues','Thurs'],
'MW':['Mon','Wed'],
'MTW':['Mon','Tues','Wed'],
'MTTH':['Mon','Tues','Thurs'],
'M':['Mon'],
'WF':['Wed','Fri'],
'F':['Fri'],
'TH':['Thurs'],
'MWF':['Mon','Wed','Fri'],
'MTH':['Mon','Thurs'],
'MTWF':['Mon','Tues','Wed','Fri'],
'MF':['Mon','Fri'],
'M-TH':['Mon','Tues','Wed','Thurs'],
'M-F':['Mon','Tues','Wed','Thurs','Fri'],
'T':['Tues'],
'MT':['Mon','Tues'],
'W':['Wed'],
'TF':['Tues','Fri']}
classdays_full_dict = {'TTH':['Tuesday','Thursday'],
'MW':['Monday','Wednesday'],
'MTW':['Monday','Tuesday','Wednesday'],
'MTTH':['Monday','Tuesday','Thursday'],
'M':['Monday'],
'WF':['Wednesday','Friday'],
'F':['Friday'],
'TH':['Thursday'],
'MWF':['Monday','Wednesday','Friday'],
'MTH':['Monday','Thursday'],
'MTWF':['Monday','Tuesday','Wednesday','Friday'],
'MF':['Monday','Friday'],
'M-TH':['Monday','Tuesday','Wednesday','Thursday'],
'M-F':['Monday','Tuesday','Wednesday','Thursday','Friday'],
'T':['Tuesday'],
'MT':['Monday','Tuesday'],
'W':['Wednesday'],
'TF':['Tuesday','Friday']}
if days_version.lower() == "abbreviation":
classdays_dict = classdays_abbrev_dict
else:
classdays_dict = classdays_full_dict# days_version = "full"
days_version = "abbreviation"
days_letters = ['M', 'T', 'W', 'H', 'F']
days_list_full = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
days_list_abbrev = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri']
# Using Pandas Series since you can use slice notation on them for day ranges with dashes
if days_version.lower() == "full":
classdays_series = pd.Series(dict(zip(days_letters, days_list_full)))
else:
classdays_series = pd.Series(dict(zip(days_letters, days_list_abbrev)))
classdays_dict = {}
for day_pattern in students_advisors_df.get_column('Pat').unique().to_list():
if day_pattern is not None: # There are some null day patterns
day_pattern_noTH = day_pattern.replace('TH','H') # Thurs only two-character abbrev
if '-' in day_pattern_noTH:
# Handle patterns like 'M-W', 'T-H', etc.
match = re.search(r'([A-Z])-([A-Z])', day_pattern_noTH)
classdays_dict[day_pattern] = classdays_series[slice(match.group(1),match.group(2))].to_list()
else:
# Handle single day patterns like 'MWF', 'TTH', etc.
classdays_dict[day_pattern] = classdays_series[list(day_pattern_noTH)].to_list()
classdays_dict{'TH': ['Thurs'],
'M': ['Mon'],
'TTH': ['Tues', 'Thurs'],
'WF': ['Wed', 'Fri'],
'MW': ['Mon', 'Wed'],
'MWF': ['Mon', 'Wed', 'Fri'],
'W': ['Wed'],
'F': ['Fri'],
'MF': ['Mon', 'Fri'],
'T': ['Tues'],
'MTTH': ['Mon', 'Tues', 'Thurs']}
| Pat | weekday |
|---|---|
| str | list[str] |
| "WF" | ["Wed", "Fri"] |
| "WF" | ["Wed", "Fri"] |
| "TTH" | ["Tues", "Thurs"] |
| "F" | ["Fri"] |
| "MW" | ["Mon", "Wed"] |
| … | … |
| "T" | ["Tues"] |
| "TTH" | ["Tues", "Thurs"] |
| "MW" | ["Mon", "Wed"] |
| "W" | ["Wed"] |
| "TTH" | ["Tues", "Thurs"] |
sa_w_weekdays = (students_advisors_df
.with_columns(weekday=pl.col('Pat').replace_strict(classdays_dict))
.explode('weekday')
)
sa_w_weekdays.select(pl.col('StudentName','Descr','Pat','weekday'))| StudentName | Descr | Pat | weekday |
|---|---|---|---|
| str | str | str | str |
| "Joseph Garner" | "INTRODUCTORY MECHANICS" | "WF" | "Wed" |
| "Joseph Garner" | "INTRODUCTORY MECHANICS" | "WF" | "Fri" |
| "Joseph Garner" | "ACADEMIC WRITING" | "WF" | "Wed" |
| "Joseph Garner" | "ACADEMIC WRITING" | "WF" | "Fri" |
| "Joseph Garner" | "MATRICES AND VECTORS" | "TTH" | "Tues" |
| … | … | … | … |
| "Judith Norris" | "ADV TOPICS IN DEEP LEARNING" | "MW" | "Mon" |
| "Judith Norris" | "ADV TOPICS IN DEEP LEARNING" | "MW" | "Wed" |
| "Judith Norris" | "INTRO TO SIGNALS AND SYSTEMS" | "W" | "Wed" |
| "Judith Norris" | "MATRICES AND VECTORS" | "TTH" | "Tues" |
| "Judith Norris" | "MATRICES AND VECTORS" | "TTH" | "Thurs" |
time_ranges()closed='left' so don’t include the end time in listtime_ranges()closed='left' so don’t include the end time in list| StudentName | weekday | start | end | Time |
|---|---|---|---|---|
| str | str | time | time | list[time] |
| "Joseph Garner" | "Wed" | 13:25:00 | 14:40:00 | [13:25:00, 13:30:00, … 14:35:00] |
| "Joseph Garner" | "Fri" | 13:25:00 | 14:40:00 | [13:25:00, 13:30:00, … 14:35:00] |
| "Joseph Garner" | "Wed" | 15:05:00 | 16:20:00 | [15:05:00, 15:10:00, … 16:15:00] |
| "Joseph Garner" | "Fri" | 15:05:00 | 16:20:00 | [15:05:00, 15:10:00, … 16:15:00] |
| "Joseph Garner" | "Tues" | 10:05:00 | 11:20:00 | [10:05:00, 10:10:00, … 11:15:00] |
| … | … | … | … | … |
| "Judith Norris" | "Mon" | 15:05:00 | 16:20:00 | [15:05:00, 15:10:00, … 16:15:00] |
| "Judith Norris" | "Wed" | 15:05:00 | 16:20:00 | [15:05:00, 15:10:00, … 16:15:00] |
| "Judith Norris" | "Wed" | 10:05:00 | 12:55:00 | [10:05:00, 10:10:00, … 12:50:00] |
| "Judith Norris" | "Tues" | 08:30:00 | 09:45:00 | [08:30:00, 08:35:00, … 09:40:00] |
| "Judith Norris" | "Thurs" | 08:30:00 | 09:45:00 | [08:30:00, 08:35:00, … 09:40:00] |
closed='left' so don’t include the end time in list(sa_w_weekdays
.with_columns(Time=pl.time_ranges("start", "end", interval='5m', closed='left'))
.select(pl.col('StudentName','weekday','start','end','Time'))
.explode('Time')
)| StudentName | weekday | start | end | Time |
|---|---|---|---|---|
| str | str | time | time | time |
| "Joseph Garner" | "Wed" | 13:25:00 | 14:40:00 | 13:25:00 |
| "Joseph Garner" | "Wed" | 13:25:00 | 14:40:00 | 13:30:00 |
| "Joseph Garner" | "Wed" | 13:25:00 | 14:40:00 | 13:35:00 |
| "Joseph Garner" | "Wed" | 13:25:00 | 14:40:00 | 13:40:00 |
| "Joseph Garner" | "Wed" | 13:25:00 | 14:40:00 | 13:45:00 |
| … | … | … | … | … |
| "Judith Norris" | "Thurs" | 08:30:00 | 09:45:00 | 09:20:00 |
| "Judith Norris" | "Thurs" | 08:30:00 | 09:45:00 | 09:25:00 |
| "Judith Norris" | "Thurs" | 08:30:00 | 09:45:00 | 09:30:00 |
| "Judith Norris" | "Thurs" | 08:30:00 | 09:45:00 | 09:35:00 |
| "Judith Norris" | "Thurs" | 08:30:00 | 09:45:00 | 09:40:00 |
class_day_time_df = (
sa_w_weekdays
.with_columns(Time=pl.time_ranges("start", "end", interval='5m', closed='left'))
.explode('Time')
.select(pl.col('Time').dt.strftime("%H:%M"),
pl.col('weekday').alias('Day'),
pl.col('Descr').alias('Class'),
pl.col('StudentName'),
pl.col('AdvisorName')
)
)
class_day_time_df.alias() form in .select() since prefer this column order and can’t put “positional” arguments after “keyword” ones| Time | Day | Class | StudentName | AdvisorName |
|---|---|---|---|---|
| str | str | str | str | str |
| "13:25" | "Wed" | "INTRODUCTORY MECHANICS" | "Joseph Garner" | "Henry Schroeder" |
| "13:30" | "Wed" | "INTRODUCTORY MECHANICS" | "Joseph Garner" | "Henry Schroeder" |
| "13:35" | "Wed" | "INTRODUCTORY MECHANICS" | "Joseph Garner" | "Henry Schroeder" |
| "13:40" | "Wed" | "INTRODUCTORY MECHANICS" | "Joseph Garner" | "Henry Schroeder" |
| "13:45" | "Wed" | "INTRODUCTORY MECHANICS" | "Joseph Garner" | "Henry Schroeder" |
| … | … | … | … | … |
| "09:20" | "Thurs" | "MATRICES AND VECTORS" | "Judith Norris" | "Angie Moon" |
| "09:25" | "Thurs" | "MATRICES AND VECTORS" | "Judith Norris" | "Angie Moon" |
| "09:30" | "Thurs" | "MATRICES AND VECTORS" | "Judith Norris" | "Angie Moon" |
| "09:35" | "Thurs" | "MATRICES AND VECTORS" | "Judith Norris" | "Angie Moon" |
| "09:40" | "Thurs" | "MATRICES AND VECTORS" | "Judith Norris" | "Angie Moon" |
times_list = (class_day_time_df
.get_column('Time')
.unique()
.sort()
.to_list()
)
# Look at a few entries...
print(times_list[:5])
# Sets the visualization column order
if days_version.lower()=="abbreviation":
days_list = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri']
else:
days_list = ['Monday','Tuesday','Wednesday','Thursday','Friday']
advisors_list = (class_day_time_df
.get_column('AdvisorName')
.unique()
.sort()
.to_list()
)
print('There are', len(advisors_list), 'advisors')[None, '08:30', '08:35', '08:40', '08:45']
There are 27 advisors
| Day | Time | Count | Students |
|---|---|---|---|
| str | str | u32 | str |
| "Tues" | "09:20" | 3 | "Taylor Freeman; Elaine Drake; … |
| "Mon" | "10:35" | 8 | "Wendy Barr; Ralph Hernandez; C… |
| "Thurs" | "14:15" | 3 | "Taylor Freeman; Elaine Drake; … |
| "Mon" | "13:50" | 8 | "Lori Bennett; Wendy Barr; Ralp… |
| "Thurs" | "09:10" | 2 | "Taylor Freeman; Joanne Torres" |
| … | … | … | … |
| "Wed" | "17:25" | 3 | "Lori Bennett; Taylor Freeman; … |
| "Fri" | "14:10" | 4 | "Ralph Hernandez; Francisco Jua… |
| "Wed" | "12:30" | 8 | "Lori Bennett; Wendy Barr; Ralp… |
| "Tues" | "17:35" | 2 | "Wendy Barr; Francisco Juarez" |
| "Tues" | "12:55" | 6 | "Lori Bennett; Wendy Barr; Tayl… |
single_advisor_df = (
class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
)
single_advisor_df.plot.rect(
x='Day',
y='Time',
color='Count',
tooltip=['Day','Time','Count','Students']
)single_advisor_df = (
class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
)
single_advisor_df.plot.rect(
x='Day:O',
y='Time:O',
color='Count:Q',
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
).plot.rect(
x='Day:O',
y='Time:O',
color='Count:Q',
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x='Day:O',
y='Time:O',
color='Count:Q',
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x=alt.X('Day:O'),
y=alt.Y('Time:O'),
color=alt.Color('Count:Q'),
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x=alt.X('Day:O',
axis=alt.Axis(labelAngle=0),
sort=days_list,
scale=alt.Scale(domain=days_list),
title=''),
y=alt.Y('Time:O'),
color=alt.Color('Count:Q'),
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x=alt.X('Day:O',
axis=alt.Axis(labelAngle=0),
sort=days_list,
scale=alt.Scale(domain=days_list),
title=''),
y=alt.Y('Time:O',
title='time of day',
scale=alt.Scale(domain=times_list),
axis=alt.Axis(labelOverlap=True)),
color=alt.Color('Count:Q'),
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x=alt.X('Day:O',
axis=alt.Axis(labelAngle=0),
sort=days_list,
scale=alt.Scale(domain=days_list),
title=''),
y=alt.Y('Time:O',
title='time of day',
scale=alt.Scale(domain=times_list),
axis=alt.Axis(labelOverlap=True)),
color=alt.Color('Count:Q',
scale=alt.Scale(scheme='blues'),
legend=alt.Legend(title='# students')),
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500
)
)(class_day_time_df
.filter(pl.col('AdvisorName') == advisor)
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x=alt.X('Day:O',
axis = alt.Axis(labelAngle=0), # default vertical labels
sort=days_list,
scale=alt.Scale(domain=days_list), title=''),
y=alt.Y('Time:O',
title='time of day',
scale=alt.Scale(domain=times_list),
axis=alt.Axis(labelOverlap=True)), # confusing naming
color=alt.Color('Count:Q',
scale=alt.Scale(scheme='blues'),
legend=alt.Legend(title='# students')),
tooltip=['Day','Time','Count','Students']
).properties(
width=180,
height=500,
title=advisor
)
)(class_day_time_df
.filter(pl.col('AdvisorName')!='No Advisor')
.group_by(pl.col('AdvisorName','Day','Time'))
.agg(Count=pl.col('Class').count(),
Students=pl.col('StudentName').str.join('; '))
.plot.rect(
x=alt.X('Day:O',
axis = alt.Axis(labelAngle=0),
sort=days_list,
scale=alt.Scale(domain=tuple(days_list)), title=''),
y=alt.Y('Time:O',
title='time of day',
scale=alt.Scale(domain=times_list),
axis=alt.Axis(labelOverlap='parity')),
color=alt.Color('Count:Q',
scale=alt.Scale(scheme='blues'),
legend=alt.Legend(title='# students')),
tooltip=['Day','Time','Count','Students']
).properties(
width=150, height=300, title=advisor
).facet(
facet="AdvisorName",
columns=5
)
)